import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as py
from plotly.offline import iplot
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
df=pd.read_csv("C:/Users/Rahul Gupta/Documents/Dataset/football_data.csv",index_col=0)
df.head()
| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | Value | Wage | Special | Preferred Foot | International Reputation | Weak Foot | Skill Moves | Work Rate | Body Type | Real Face | Position | Jersey Number | Joined | Loaned From | Contract Valid Until | Height | Weight | LS | ST | RS | LW | LF | CF | RF | RW | LAM | CAM | RAM | LM | LCM | CM | RCM | RM | LWB | LDM | CDM | RDM | RWB | LB | LCB | CB | RCB | RB | Crossing | Finishing | HeadingAccuracy | ShortPassing | Volleys | Dribbling | Curve | FKAccuracy | LongPassing | BallControl | Acceleration | SprintSpeed | Agility | Reactions | Balance | ShotPower | Jumping | Stamina | Strength | LongShots | Aggression | Interceptions | Positioning | Vision | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | Release Clause | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | L. Messi | 31 | https://cdn.sofifa.org/players/4/19/158023.png | Argentina | https://cdn.sofifa.org/flags/52.png | 94 | 94 | FC Barcelona | https://cdn.sofifa.org/teams/2/light/241.png | €110.5M | €565K | 2202 | Left | 5.0 | 4.0 | 4.0 | Medium/ Medium | Messi | Yes | RF | 10.0 | Jul 1, 2004 | NaN | 2021 | 5'7 | 159lbs | 88+2 | 88+2 | 88+2 | 92+2 | 93+2 | 93+2 | 93+2 | 92+2 | 93+2 | 93+2 | 93+2 | 91+2 | 84+2 | 84+2 | 84+2 | 91+2 | 64+2 | 61+2 | 61+2 | 61+2 | 64+2 | 59+2 | 47+2 | 47+2 | 47+2 | 59+2 | 84.0 | 95.0 | 70.0 | 90.0 | 86.0 | 97.0 | 93.0 | 94.0 | 87.0 | 96.0 | 91.0 | 86.0 | 91.0 | 95.0 | 95.0 | 85.0 | 68.0 | 72.0 | 59.0 | 94.0 | 48.0 | 22.0 | 94.0 | 94.0 | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 | €226.5M |
| 1 | 20801 | Cristiano Ronaldo | 33 | https://cdn.sofifa.org/players/4/19/20801.png | Portugal | https://cdn.sofifa.org/flags/38.png | 94 | 94 | Juventus | https://cdn.sofifa.org/teams/2/light/45.png | €77M | €405K | 2228 | Right | 5.0 | 4.0 | 5.0 | High/ Low | C. Ronaldo | Yes | ST | 7.0 | Jul 10, 2018 | NaN | 2022 | 6'2 | 183lbs | 91+3 | 91+3 | 91+3 | 89+3 | 90+3 | 90+3 | 90+3 | 89+3 | 88+3 | 88+3 | 88+3 | 88+3 | 81+3 | 81+3 | 81+3 | 88+3 | 65+3 | 61+3 | 61+3 | 61+3 | 65+3 | 61+3 | 53+3 | 53+3 | 53+3 | 61+3 | 84.0 | 94.0 | 89.0 | 81.0 | 87.0 | 88.0 | 81.0 | 76.0 | 77.0 | 94.0 | 89.0 | 91.0 | 87.0 | 96.0 | 70.0 | 95.0 | 95.0 | 88.0 | 79.0 | 93.0 | 63.0 | 29.0 | 95.0 | 82.0 | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 | €127.1M |
| 2 | 190871 | Neymar Jr | 26 | https://cdn.sofifa.org/players/4/19/190871.png | Brazil | https://cdn.sofifa.org/flags/54.png | 92 | 93 | Paris Saint-Germain | https://cdn.sofifa.org/teams/2/light/73.png | €118.5M | €290K | 2143 | Right | 5.0 | 5.0 | 5.0 | High/ Medium | Neymar | Yes | LW | 10.0 | Aug 3, 2017 | NaN | 2022 | 5'9 | 150lbs | 84+3 | 84+3 | 84+3 | 89+3 | 89+3 | 89+3 | 89+3 | 89+3 | 89+3 | 89+3 | 89+3 | 88+3 | 81+3 | 81+3 | 81+3 | 88+3 | 65+3 | 60+3 | 60+3 | 60+3 | 65+3 | 60+3 | 47+3 | 47+3 | 47+3 | 60+3 | 79.0 | 87.0 | 62.0 | 84.0 | 84.0 | 96.0 | 88.0 | 87.0 | 78.0 | 95.0 | 94.0 | 90.0 | 96.0 | 94.0 | 84.0 | 80.0 | 61.0 | 81.0 | 49.0 | 82.0 | 56.0 | 36.0 | 89.0 | 87.0 | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 | €228.1M |
| 3 | 193080 | De Gea | 27 | https://cdn.sofifa.org/players/4/19/193080.png | Spain | https://cdn.sofifa.org/flags/45.png | 91 | 93 | Manchester United | https://cdn.sofifa.org/teams/2/light/11.png | €72M | €260K | 1471 | Right | 4.0 | 3.0 | 1.0 | Medium/ Medium | Lean | Yes | GK | 1.0 | Jul 1, 2011 | NaN | 2020 | 6'4 | 168lbs | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 17.0 | 13.0 | 21.0 | 50.0 | 13.0 | 18.0 | 21.0 | 19.0 | 51.0 | 42.0 | 57.0 | 58.0 | 60.0 | 90.0 | 43.0 | 31.0 | 67.0 | 43.0 | 64.0 | 12.0 | 38.0 | 30.0 | 12.0 | 68.0 | 40.0 | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 | €138.6M |
| 4 | 192985 | K. De Bruyne | 27 | https://cdn.sofifa.org/players/4/19/192985.png | Belgium | https://cdn.sofifa.org/flags/7.png | 91 | 92 | Manchester City | https://cdn.sofifa.org/teams/2/light/10.png | €102M | €355K | 2281 | Right | 4.0 | 5.0 | 4.0 | High/ High | Normal | Yes | RCM | 7.0 | Aug 30, 2015 | NaN | 2023 | 5'11 | 154lbs | 82+3 | 82+3 | 82+3 | 87+3 | 87+3 | 87+3 | 87+3 | 87+3 | 88+3 | 88+3 | 88+3 | 88+3 | 87+3 | 87+3 | 87+3 | 88+3 | 77+3 | 77+3 | 77+3 | 77+3 | 77+3 | 73+3 | 66+3 | 66+3 | 66+3 | 73+3 | 93.0 | 82.0 | 55.0 | 92.0 | 82.0 | 86.0 | 85.0 | 83.0 | 91.0 | 91.0 | 78.0 | 76.0 | 79.0 | 91.0 | 77.0 | 91.0 | 63.0 | 90.0 | 75.0 | 91.0 | 76.0 | 61.0 | 87.0 | 94.0 | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 | €196.4M |
df.shape
(18207, 88)
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 18207 entries, 0 to 18206 Data columns (total 88 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 18207 non-null int64 1 Name 18207 non-null object 2 Age 18207 non-null int64 3 Photo 18207 non-null object 4 Nationality 18207 non-null object 5 Flag 18207 non-null object 6 Overall 18207 non-null int64 7 Potential 18207 non-null int64 8 Club 17966 non-null object 9 Club Logo 18207 non-null object 10 Value 18207 non-null object 11 Wage 18207 non-null object 12 Special 18207 non-null int64 13 Preferred Foot 18159 non-null object 14 International Reputation 18159 non-null float64 15 Weak Foot 18159 non-null float64 16 Skill Moves 18159 non-null float64 17 Work Rate 18159 non-null object 18 Body Type 18159 non-null object 19 Real Face 18159 non-null object 20 Position 18147 non-null object 21 Jersey Number 18147 non-null float64 22 Joined 16654 non-null object 23 Loaned From 1264 non-null object 24 Contract Valid Until 17918 non-null object 25 Height 18159 non-null object 26 Weight 18159 non-null object 27 LS 16122 non-null object 28 ST 16122 non-null object 29 RS 16122 non-null object 30 LW 16122 non-null object 31 LF 16122 non-null object 32 CF 16122 non-null object 33 RF 16122 non-null object 34 RW 16122 non-null object 35 LAM 16122 non-null object 36 CAM 16122 non-null object 37 RAM 16122 non-null object 38 LM 16122 non-null object 39 LCM 16122 non-null object 40 CM 16122 non-null object 41 RCM 16122 non-null object 42 RM 16122 non-null object 43 LWB 16122 non-null object 44 LDM 16122 non-null object 45 CDM 16122 non-null object 46 RDM 16122 non-null object 47 RWB 16122 non-null object 48 LB 16122 non-null object 49 LCB 16122 non-null object 50 CB 16122 non-null object 51 RCB 16122 non-null object 52 RB 16122 non-null object 53 Crossing 18159 non-null float64 54 Finishing 18159 non-null float64 55 HeadingAccuracy 18159 non-null float64 56 ShortPassing 18159 non-null float64 57 Volleys 18159 non-null float64 58 Dribbling 18159 non-null float64 59 Curve 18159 non-null float64 60 FKAccuracy 18159 non-null float64 61 LongPassing 18159 non-null float64 62 BallControl 18159 non-null float64 63 Acceleration 18159 non-null float64 64 SprintSpeed 18159 non-null float64 65 Agility 18159 non-null float64 66 Reactions 18159 non-null float64 67 Balance 18159 non-null float64 68 ShotPower 18159 non-null float64 69 Jumping 18159 non-null float64 70 Stamina 18159 non-null float64 71 Strength 18159 non-null float64 72 LongShots 18159 non-null float64 73 Aggression 18159 non-null float64 74 Interceptions 18159 non-null float64 75 Positioning 18159 non-null float64 76 Vision 18159 non-null float64 77 Penalties 18159 non-null float64 78 Composure 18159 non-null float64 79 Marking 18159 non-null float64 80 StandingTackle 18159 non-null float64 81 SlidingTackle 18159 non-null float64 82 GKDiving 18159 non-null float64 83 GKHandling 18159 non-null float64 84 GKKicking 18159 non-null float64 85 GKPositioning 18159 non-null float64 86 GKReflexes 18159 non-null float64 87 Release Clause 16643 non-null object dtypes: float64(38), int64(5), object(45) memory usage: 12.4+ MB
pd.set_option('display.max_rows',500)
df.isna().sum()
ID 0 Name 0 Age 0 Photo 0 Nationality 0 Flag 0 Overall 0 Potential 0 Club 241 Club Logo 0 Value 0 Wage 0 Special 0 Preferred Foot 48 International Reputation 48 Weak Foot 48 Skill Moves 48 Work Rate 48 Body Type 48 Real Face 48 Position 60 Jersey Number 60 Joined 1553 Loaned From 16943 Contract Valid Until 289 Height 48 Weight 48 LS 2085 ST 2085 RS 2085 LW 2085 LF 2085 CF 2085 RF 2085 RW 2085 LAM 2085 CAM 2085 RAM 2085 LM 2085 LCM 2085 CM 2085 RCM 2085 RM 2085 LWB 2085 LDM 2085 CDM 2085 RDM 2085 RWB 2085 LB 2085 LCB 2085 CB 2085 RCB 2085 RB 2085 Crossing 48 Finishing 48 HeadingAccuracy 48 ShortPassing 48 Volleys 48 Dribbling 48 Curve 48 FKAccuracy 48 LongPassing 48 BallControl 48 Acceleration 48 SprintSpeed 48 Agility 48 Reactions 48 Balance 48 ShotPower 48 Jumping 48 Stamina 48 Strength 48 LongShots 48 Aggression 48 Interceptions 48 Positioning 48 Vision 48 Penalties 48 Composure 48 Marking 48 StandingTackle 48 SlidingTackle 48 GKDiving 48 GKHandling 48 GKKicking 48 GKPositioning 48 GKReflexes 48 Release Clause 1564 dtype: int64
data=df.copy()
Some of the columns in the dataset are irrelevent hence we may drop them
data=data.drop(columns=['ID','Photo','Flag','Club Logo','Loaned From','Real Face','Release Clause'],axis=1)
data.shape
(18207, 81)
data.dropna(axis=0, inplace = True)
data.shape
(14751, 81)
data.isna().sum()
Name 0 Age 0 Nationality 0 Overall 0 Potential 0 Club 0 Value 0 Wage 0 Special 0 Preferred Foot 0 International Reputation 0 Weak Foot 0 Skill Moves 0 Work Rate 0 Body Type 0 Position 0 Jersey Number 0 Joined 0 Contract Valid Until 0 Height 0 Weight 0 LS 0 ST 0 RS 0 LW 0 LF 0 CF 0 RF 0 RW 0 LAM 0 CAM 0 RAM 0 LM 0 LCM 0 CM 0 RCM 0 RM 0 LWB 0 LDM 0 CDM 0 RDM 0 RWB 0 LB 0 LCB 0 CB 0 RCB 0 RB 0 Crossing 0 Finishing 0 HeadingAccuracy 0 ShortPassing 0 Volleys 0 Dribbling 0 Curve 0 FKAccuracy 0 LongPassing 0 BallControl 0 Acceleration 0 SprintSpeed 0 Agility 0 Reactions 0 Balance 0 ShotPower 0 Jumping 0 Stamina 0 Strength 0 LongShots 0 Aggression 0 Interceptions 0 Positioning 0 Vision 0 Penalties 0 Composure 0 Marking 0 StandingTackle 0 SlidingTackle 0 GKDiving 0 GKHandling 0 GKKicking 0 GKPositioning 0 GKReflexes 0 dtype: int64
#Number of duplicate rows
data.duplicated().sum()
0
#Cleaning the value and wage columns
def manipulate_mk(Value):
if isinstance(Value,str):
rep = Value.replace('€', '')
if 'M' in rep:
rep = float(rep.replace('M', ''))*1000000
elif 'K' in Value:
rep = float(rep.replace('K', ''))*1000
return float(rep)
data['Value']=data['Value'].apply(lambda x:manipulate_mk(x))
data['Wage']=data['Wage'].apply(lambda x:manipulate_mk(x))
#Cleaning and Preprocessing the 'Joined' Column
data['Joined']=pd.to_datetime(data['Joined'])
data['Joined']=pd.DatetimeIndex(data['Joined']).year
data['Joined']=data['Joined'].replace(np.nan,0)
data['Joined']=data['Joined'].astype(str)
#Cleaning and Preprocessing the 'Contract Valid Until' Column
data['Contract Valid Until']=pd.to_datetime(data['Contract Valid Until'])
data['Contract Valid Until']=pd.DatetimeIndex(data['Contract Valid Until']).year
data['Contract Valid Until']=data['Contract Valid Until'].replace(np.nan,0)
data['Contract Valid Until']=data['Contract Valid Until'].astype(str)
#Cleaning and Preprocessing the 'Height' Column
def clean_height(val):
obs=[]
value=[]
if isinstance(val,str):
obs=val.split("'")
a=obs[0]
b=obs[1]
value=(int(a)*12+int(b))/12
return(value)
data['Height']=data['Height'].apply(clean_height)
#Cleaning and Preprocessing the 'Weight' Column
def clean_weight(val):
obs=[]
if isinstance(val,str):
obs=int(val.replace('lbs',''))
return(obs)
data['Weight']=data['Weight'].apply(lambda x:clean_weight(x))
data['Weight']=data['Weight'].astype(float)
#Cleaning columns like 'LS','ST','RS',etc.
def remove_plus(val):
value=[]
if isinstance(val,str):
obs=val.split('+')
a=obs[0]
b=obs[1]
value=int(a)+int(b)
return(value)
col=['LS', 'ST', 'RS','LW', 'LF', 'CF', 'RF', 'RW', 'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM','RCM', 'RM', 'LWB', 'LDM', 'CDM', 'RDM', 'RWB', 'LB', 'LCB', 'CB','RCB', 'RB']
for i in col:
data[i]=data[i].apply(remove_plus)
data[i]=data[i].astype(float)
pd.set_option('display.max_columns',500)
data.head()
| Name | Age | Nationality | Overall | Potential | Club | Value | Wage | Special | Preferred Foot | International Reputation | Weak Foot | Skill Moves | Work Rate | Body Type | Position | Jersey Number | Joined | Contract Valid Until | Height | Weight | LS | ST | RS | LW | LF | CF | RF | RW | LAM | CAM | RAM | LM | LCM | CM | RCM | RM | LWB | LDM | CDM | RDM | RWB | LB | LCB | CB | RCB | RB | Crossing | Finishing | HeadingAccuracy | ShortPassing | Volleys | Dribbling | Curve | FKAccuracy | LongPassing | BallControl | Acceleration | SprintSpeed | Agility | Reactions | Balance | ShotPower | Jumping | Stamina | Strength | LongShots | Aggression | Interceptions | Positioning | Vision | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | L. Messi | 31 | Argentina | 94 | 94 | FC Barcelona | 110500000.0 | 565000.0 | 2202 | Left | 5.0 | 4.0 | 4.0 | Medium/ Medium | Messi | RF | 10.0 | 2004 | 2021 | 5.583333 | 159.0 | 90.0 | 90.0 | 90.0 | 94.0 | 95.0 | 95.0 | 95.0 | 94.0 | 95.0 | 95.0 | 95.0 | 93.0 | 86.0 | 86.0 | 86.0 | 93.0 | 66.0 | 63.0 | 63.0 | 63.0 | 66.0 | 61.0 | 49.0 | 49.0 | 49.0 | 61.0 | 84.0 | 95.0 | 70.0 | 90.0 | 86.0 | 97.0 | 93.0 | 94.0 | 87.0 | 96.0 | 91.0 | 86.0 | 91.0 | 95.0 | 95.0 | 85.0 | 68.0 | 72.0 | 59.0 | 94.0 | 48.0 | 22.0 | 94.0 | 94.0 | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 |
| 1 | Cristiano Ronaldo | 33 | Portugal | 94 | 94 | Juventus | 77000000.0 | 405000.0 | 2228 | Right | 5.0 | 4.0 | 5.0 | High/ Low | C. Ronaldo | ST | 7.0 | 2018 | 2022 | 6.166667 | 183.0 | 94.0 | 94.0 | 94.0 | 92.0 | 93.0 | 93.0 | 93.0 | 92.0 | 91.0 | 91.0 | 91.0 | 91.0 | 84.0 | 84.0 | 84.0 | 91.0 | 68.0 | 64.0 | 64.0 | 64.0 | 68.0 | 64.0 | 56.0 | 56.0 | 56.0 | 64.0 | 84.0 | 94.0 | 89.0 | 81.0 | 87.0 | 88.0 | 81.0 | 76.0 | 77.0 | 94.0 | 89.0 | 91.0 | 87.0 | 96.0 | 70.0 | 95.0 | 95.0 | 88.0 | 79.0 | 93.0 | 63.0 | 29.0 | 95.0 | 82.0 | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
| 2 | Neymar Jr | 26 | Brazil | 92 | 93 | Paris Saint-Germain | 118500000.0 | 290000.0 | 2143 | Right | 5.0 | 5.0 | 5.0 | High/ Medium | Neymar | LW | 10.0 | 2017 | 2022 | 5.750000 | 150.0 | 87.0 | 87.0 | 87.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 91.0 | 84.0 | 84.0 | 84.0 | 91.0 | 68.0 | 63.0 | 63.0 | 63.0 | 68.0 | 63.0 | 50.0 | 50.0 | 50.0 | 63.0 | 79.0 | 87.0 | 62.0 | 84.0 | 84.0 | 96.0 | 88.0 | 87.0 | 78.0 | 95.0 | 94.0 | 90.0 | 96.0 | 94.0 | 84.0 | 80.0 | 61.0 | 81.0 | 49.0 | 82.0 | 56.0 | 36.0 | 89.0 | 87.0 | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 |
| 4 | K. De Bruyne | 27 | Belgium | 91 | 92 | Manchester City | 102000000.0 | 355000.0 | 2281 | Right | 4.0 | 5.0 | 4.0 | High/ High | Normal | RCM | 7.0 | 2015 | 2023 | 5.916667 | 154.0 | 85.0 | 85.0 | 85.0 | 90.0 | 90.0 | 90.0 | 90.0 | 90.0 | 91.0 | 91.0 | 91.0 | 91.0 | 90.0 | 90.0 | 90.0 | 91.0 | 80.0 | 80.0 | 80.0 | 80.0 | 80.0 | 76.0 | 69.0 | 69.0 | 69.0 | 76.0 | 93.0 | 82.0 | 55.0 | 92.0 | 82.0 | 86.0 | 85.0 | 83.0 | 91.0 | 91.0 | 78.0 | 76.0 | 79.0 | 91.0 | 77.0 | 91.0 | 63.0 | 90.0 | 75.0 | 91.0 | 76.0 | 61.0 | 87.0 | 94.0 | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 |
| 5 | E. Hazard | 27 | Belgium | 91 | 91 | Chelsea | 93000000.0 | 340000.0 | 2142 | Right | 4.0 | 4.0 | 4.0 | High/ Medium | Normal | LF | 10.0 | 2012 | 2020 | 5.666667 | 163.0 | 86.0 | 86.0 | 86.0 | 92.0 | 91.0 | 91.0 | 91.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 85.0 | 85.0 | 85.0 | 92.0 | 69.0 | 66.0 | 66.0 | 66.0 | 69.0 | 63.0 | 52.0 | 52.0 | 52.0 | 63.0 | 81.0 | 84.0 | 61.0 | 89.0 | 80.0 | 95.0 | 83.0 | 79.0 | 83.0 | 94.0 | 94.0 | 88.0 | 95.0 | 90.0 | 94.0 | 82.0 | 56.0 | 83.0 | 66.0 | 80.0 | 54.0 | 41.0 | 87.0 | 89.0 | 86.0 | 91.0 | 34.0 | 27.0 | 22.0 | 11.0 | 12.0 | 6.0 | 8.0 | 8.0 |
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 14751 entries, 0 to 18206 Data columns (total 81 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 14751 non-null object 1 Age 14751 non-null int64 2 Nationality 14751 non-null object 3 Overall 14751 non-null int64 4 Potential 14751 non-null int64 5 Club 14751 non-null object 6 Value 14751 non-null float64 7 Wage 14751 non-null float64 8 Special 14751 non-null int64 9 Preferred Foot 14751 non-null object 10 International Reputation 14751 non-null float64 11 Weak Foot 14751 non-null float64 12 Skill Moves 14751 non-null float64 13 Work Rate 14751 non-null object 14 Body Type 14751 non-null object 15 Position 14751 non-null object 16 Jersey Number 14751 non-null float64 17 Joined 14751 non-null object 18 Contract Valid Until 14751 non-null object 19 Height 14751 non-null float64 20 Weight 14751 non-null float64 21 LS 14751 non-null float64 22 ST 14751 non-null float64 23 RS 14751 non-null float64 24 LW 14751 non-null float64 25 LF 14751 non-null float64 26 CF 14751 non-null float64 27 RF 14751 non-null float64 28 RW 14751 non-null float64 29 LAM 14751 non-null float64 30 CAM 14751 non-null float64 31 RAM 14751 non-null float64 32 LM 14751 non-null float64 33 LCM 14751 non-null float64 34 CM 14751 non-null float64 35 RCM 14751 non-null float64 36 RM 14751 non-null float64 37 LWB 14751 non-null float64 38 LDM 14751 non-null float64 39 CDM 14751 non-null float64 40 RDM 14751 non-null float64 41 RWB 14751 non-null float64 42 LB 14751 non-null float64 43 LCB 14751 non-null float64 44 CB 14751 non-null float64 45 RCB 14751 non-null float64 46 RB 14751 non-null float64 47 Crossing 14751 non-null float64 48 Finishing 14751 non-null float64 49 HeadingAccuracy 14751 non-null float64 50 ShortPassing 14751 non-null float64 51 Volleys 14751 non-null float64 52 Dribbling 14751 non-null float64 53 Curve 14751 non-null float64 54 FKAccuracy 14751 non-null float64 55 LongPassing 14751 non-null float64 56 BallControl 14751 non-null float64 57 Acceleration 14751 non-null float64 58 SprintSpeed 14751 non-null float64 59 Agility 14751 non-null float64 60 Reactions 14751 non-null float64 61 Balance 14751 non-null float64 62 ShotPower 14751 non-null float64 63 Jumping 14751 non-null float64 64 Stamina 14751 non-null float64 65 Strength 14751 non-null float64 66 LongShots 14751 non-null float64 67 Aggression 14751 non-null float64 68 Interceptions 14751 non-null float64 69 Positioning 14751 non-null float64 70 Vision 14751 non-null float64 71 Penalties 14751 non-null float64 72 Composure 14751 non-null float64 73 Marking 14751 non-null float64 74 StandingTackle 14751 non-null float64 75 SlidingTackle 14751 non-null float64 76 GKDiving 14751 non-null float64 77 GKHandling 14751 non-null float64 78 GKKicking 14751 non-null float64 79 GKPositioning 14751 non-null float64 80 GKReflexes 14751 non-null float64 dtypes: float64(68), int64(4), object(9) memory usage: 9.2+ MB
data.describe()
| Age | Overall | Potential | Value | Wage | Special | International Reputation | Weak Foot | Skill Moves | Jersey Number | Height | Weight | LS | ST | RS | LW | LF | CF | RF | RW | LAM | CAM | RAM | LM | LCM | CM | RCM | RM | LWB | LDM | CDM | RDM | RWB | LB | LCB | CB | RCB | RB | Crossing | Finishing | HeadingAccuracy | ShortPassing | Volleys | Dribbling | Curve | FKAccuracy | LongPassing | BallControl | Acceleration | SprintSpeed | Agility | Reactions | Balance | ShotPower | Jumping | Stamina | Strength | LongShots | Aggression | Interceptions | Positioning | Vision | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 14751.000000 | 14751.000000 | 14751.000000 | 1.475100e+04 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 | 14751.000000 |
| mean | 25.122229 | 66.381466 | 71.331842 | 2.550233e+06 | 9987.526269 | 1666.424039 | 1.117348 | 3.001220 | 2.523422 | 19.519083 | 5.917345 | 164.105688 | 59.708969 | 59.708969 | 59.708969 | 60.937292 | 60.612501 | 60.612501 | 60.612501 | 60.937292 | 60.891668 | 60.891668 | 60.891668 | 61.656905 | 60.190835 | 60.190835 | 60.190835 | 61.656905 | 59.569453 | 58.907261 | 58.907261 | 58.907261 | 59.569453 | 58.883059 | 57.733849 | 57.733849 | 57.733849 | 58.883059 | 54.151922 | 49.552505 | 56.990238 | 62.581452 | 46.590536 | 60.420785 | 51.172937 | 46.409667 | 56.101213 | 63.061555 | 67.816826 | 67.878720 | 66.355637 | 62.165074 | 66.598332 | 59.475425 | 66.079384 | 67.372178 | 65.925632 | 51.255101 | 59.734662 | 50.638804 | 54.667887 | 55.398278 | 52.000678 | 60.308793 | 51.470748 | 52.119314 | 49.840689 | 10.504305 | 10.558199 | 10.560030 | 10.529456 | 10.506271 |
| std | 4.606411 | 6.890162 | 6.101229 | 5.832472e+06 | 22828.925608 | 198.213504 | 0.401174 | 0.635342 | 0.616368 | 15.699233 | 0.210182 | 14.777161 | 9.168249 | 9.168249 | 9.168249 | 9.925666 | 9.886656 | 9.886656 | 9.886656 | 9.925666 | 9.822199 | 9.822199 | 9.822199 | 9.312767 | 8.885386 | 8.885386 | 8.885386 | 9.312767 | 9.025698 | 10.126900 | 10.126900 | 10.126900 | 9.025698 | 9.565656 | 11.723744 | 11.723744 | 11.723744 | 9.565656 | 14.164751 | 16.344256 | 11.619529 | 9.845759 | 14.793798 | 12.551619 | 15.174564 | 15.107819 | 12.401771 | 10.047319 | 11.805581 | 11.537182 | 12.363567 | 8.867526 | 12.161967 | 13.262320 | 11.552322 | 11.283637 | 12.573126 | 15.766377 | 14.413970 | 18.750290 | 14.703792 | 12.937831 | 12.532761 | 10.233095 | 17.166916 | 19.082899 | 19.078293 | 3.099300 | 3.080962 | 3.144094 | 3.075248 | 3.103275 |
| min | 16.000000 | 46.000000 | 48.000000 | 0.000000e+00 | 1000.000000 | 1000.000000 | 1.000000 | 1.000000 | 2.000000 | 2.000000 | 5.083333 | 110.000000 | 33.000000 | 33.000000 | 33.000000 | 27.000000 | 29.000000 | 29.000000 | 29.000000 | 27.000000 | 29.000000 | 29.000000 | 29.000000 | 29.000000 | 32.000000 | 32.000000 | 32.000000 | 29.000000 | 32.000000 | 30.000000 | 30.000000 | 30.000000 | 32.000000 | 31.000000 | 27.000000 | 27.000000 | 27.000000 | 31.000000 | 11.000000 | 10.000000 | 15.000000 | 20.000000 | 10.000000 | 14.000000 | 11.000000 | 10.000000 | 19.000000 | 25.000000 | 20.000000 | 25.000000 | 23.000000 | 21.000000 | 22.000000 | 14.000000 | 28.000000 | 27.000000 | 25.000000 | 11.000000 | 13.000000 | 10.000000 | 11.000000 | 12.000000 | 12.000000 | 29.000000 | 10.000000 | 10.000000 | 10.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 25% | 21.000000 | 62.000000 | 67.000000 | 3.500000e+05 | 1000.000000 | 1524.000000 | 1.000000 | 3.000000 | 2.000000 | 8.000000 | 5.750000 | 154.000000 | 53.000000 | 53.000000 | 53.000000 | 55.000000 | 54.000000 | 54.000000 | 54.000000 | 55.000000 | 55.000000 | 55.000000 | 55.000000 | 56.000000 | 54.000000 | 54.000000 | 54.000000 | 56.000000 | 53.000000 | 52.000000 | 52.000000 | 52.000000 | 53.000000 | 52.000000 | 48.000000 | 48.000000 | 48.000000 | 52.000000 | 44.000000 | 36.000000 | 49.000000 | 57.000000 | 35.000000 | 54.000000 | 39.000000 | 34.000000 | 49.000000 | 58.000000 | 62.000000 | 62.000000 | 59.000000 | 56.000000 | 60.000000 | 51.000000 | 59.000000 | 61.000000 | 59.000000 | 40.000000 | 50.000000 | 35.000000 | 46.000000 | 47.000000 | 42.000000 | 53.000000 | 38.000000 | 36.000000 | 33.000000 | 8.000000 | 8.000000 | 8.000000 | 8.000000 | 8.000000 |
| 50% | 25.000000 | 66.000000 | 71.000000 | 7.250000e+05 | 3000.000000 | 1669.000000 | 1.000000 | 3.000000 | 2.000000 | 17.000000 | 5.916667 | 163.000000 | 60.000000 | 60.000000 | 60.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 63.000000 | 60.000000 | 60.000000 | 60.000000 | 63.000000 | 60.000000 | 60.000000 | 60.000000 | 60.000000 | 60.000000 | 60.000000 | 60.000000 | 60.000000 | 60.000000 | 60.000000 | 56.000000 | 52.000000 | 58.000000 | 64.000000 | 46.000000 | 62.000000 | 51.000000 | 44.000000 | 58.000000 | 64.000000 | 69.000000 | 69.000000 | 68.000000 | 62.000000 | 68.000000 | 61.000000 | 67.000000 | 68.000000 | 67.000000 | 54.000000 | 61.000000 | 56.000000 | 57.000000 | 57.000000 | 52.000000 | 61.000000 | 56.000000 | 59.000000 | 56.000000 | 10.000000 | 11.000000 | 11.000000 | 10.000000 | 10.000000 |
| 75% | 28.000000 | 71.000000 | 75.000000 | 2.200000e+06 | 9000.000000 | 1806.000000 | 1.000000 | 3.000000 | 3.000000 | 26.000000 | 6.083333 | 174.000000 | 66.000000 | 66.000000 | 66.000000 | 68.000000 | 67.000000 | 67.000000 | 67.000000 | 68.000000 | 68.000000 | 68.000000 | 68.000000 | 68.000000 | 66.000000 | 66.000000 | 66.000000 | 68.000000 | 66.000000 | 66.000000 | 66.000000 | 66.000000 | 66.000000 | 66.000000 | 67.000000 | 67.000000 | 67.000000 | 66.000000 | 65.000000 | 63.000000 | 65.000000 | 69.000000 | 58.000000 | 69.000000 | 63.000000 | 58.000000 | 65.000000 | 70.000000 | 76.000000 | 76.000000 | 75.000000 | 68.000000 | 75.000000 | 69.000000 | 74.000000 | 75.000000 | 75.000000 | 64.000000 | 70.000000 | 65.000000 | 65.000000 | 65.000000 | 61.000000 | 68.000000 | 65.000000 | 67.000000 | 65.000000 | 13.000000 | 13.000000 | 13.000000 | 13.000000 | 13.000000 |
| max | 41.000000 | 94.000000 | 95.000000 | 1.185000e+08 | 565000.000000 | 2346.000000 | 5.000000 | 5.000000 | 5.000000 | 99.000000 | 6.666667 | 243.000000 | 94.000000 | 94.000000 | 94.000000 | 94.000000 | 95.000000 | 95.000000 | 95.000000 | 94.000000 | 95.000000 | 95.000000 | 95.000000 | 93.000000 | 91.000000 | 91.000000 | 91.000000 | 93.000000 | 88.000000 | 90.000000 | 90.000000 | 90.000000 | 88.000000 | 87.000000 | 90.000000 | 90.000000 | 90.000000 | 87.000000 | 93.000000 | 95.000000 | 94.000000 | 93.000000 | 90.000000 | 97.000000 | 94.000000 | 94.000000 | 93.000000 | 96.000000 | 97.000000 | 96.000000 | 96.000000 | 96.000000 | 96.000000 | 95.000000 | 95.000000 | 96.000000 | 97.000000 | 94.000000 | 95.000000 | 92.000000 | 95.000000 | 94.000000 | 92.000000 | 96.000000 | 94.000000 | 93.000000 | 91.000000 | 37.000000 | 33.000000 | 41.000000 | 33.000000 | 37.000000 |
data.describe(include='O')
| Name | Nationality | Club | Preferred Foot | Work Rate | Body Type | Position | Joined | Contract Valid Until | |
|---|---|---|---|---|---|---|---|---|---|
| count | 14751 | 14751 | 14751 | 14751 | 14751 | 14751 | 14751 | 14751 | 14751 |
| unique | 14047 | 160 | 651 | 2 | 9 | 9 | 26 | 20 | 9 |
| top | J. Rodríguez | England | Arsenal | Right | Medium/ Medium | Normal | ST | 2018 | 2019 |
| freq | 8 | 1320 | 30 | 11121 | 7122 | 8379 | 1924 | 5934 | 4201 |
x=data['Nationality'].value_counts()
sns.barplot(y=x.index[0:10],x=x[0:10])
plt.ylabel("Nationality")
plt.xlabel("Frequency")
Text(0.5, 0, 'Frequency')
print("Total number of clubs are:",data['Club'].nunique())
Total number of clubs are: 651
#Top 20 clubs with highest number of players
x=data['Club'].value_counts()
sns.barplot(y=x.index[0:20],x=x[0:20])
plt.ylabel("Club")
plt.xlabel("Frequency")
Text(0.5, 0, 'Frequency')
#maximum and minimum potential and overall performanced players
top_potential=data.sort_values(by='Potential',ascending=False).head(10)
sns.barplot(x=top_potential['Potential'],y=top_potential['Name'])
plt.title("Top 10 Potential Players")
plt.xlabel("Potential")
plt.ylabel("Name")
Text(0, 0.5, 'Name')
#Top overall performance players
top_overall=data.sort_values(by='Overall',ascending=False).head(10)
sns.barplot(x=top_overall['Overall'],y=top_overall['Name'])
plt.title("Top 10 Overall Performance Players")
plt.xlabel("Overall")
plt.ylabel("Name")
Text(0, 0.5, 'Name')
#Top Valuable Players
top_value=data.sort_values(by='Value',ascending=False).head(10)
sns.barplot(x=top_value['Value'],y=top_overall['Name'])
plt.title("Top 10 Valuable Players")
plt.xlabel("Value")
plt.ylabel("Name")
Text(0, 0.5, 'Name')
top_value
| Name | Age | Nationality | Overall | Potential | Club | Value | Wage | Special | Preferred Foot | International Reputation | Weak Foot | Skill Moves | Work Rate | Body Type | Position | Jersey Number | Joined | Contract Valid Until | Height | Weight | LS | ST | RS | LW | LF | CF | RF | RW | LAM | CAM | RAM | LM | LCM | CM | RCM | RM | LWB | LDM | CDM | RDM | RWB | LB | LCB | CB | RCB | RB | Crossing | Finishing | HeadingAccuracy | ShortPassing | Volleys | Dribbling | Curve | FKAccuracy | LongPassing | BallControl | Acceleration | SprintSpeed | Agility | Reactions | Balance | ShotPower | Jumping | Stamina | Strength | LongShots | Aggression | Interceptions | Positioning | Vision | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Neymar Jr | 26 | Brazil | 92 | 93 | Paris Saint-Germain | 118500000.0 | 290000.0 | 2143 | Right | 5.0 | 5.0 | 5.0 | High/ Medium | Neymar | LW | 10.0 | 2017 | 2022 | 5.750000 | 150.0 | 87.0 | 87.0 | 87.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 91.0 | 84.0 | 84.0 | 84.0 | 91.0 | 68.0 | 63.0 | 63.0 | 63.0 | 68.0 | 63.0 | 50.0 | 50.0 | 50.0 | 63.0 | 79.0 | 87.0 | 62.0 | 84.0 | 84.0 | 96.0 | 88.0 | 87.0 | 78.0 | 95.0 | 94.0 | 90.0 | 96.0 | 94.0 | 84.0 | 80.0 | 61.0 | 81.0 | 49.0 | 82.0 | 56.0 | 36.0 | 89.0 | 87.0 | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 |
| 0 | L. Messi | 31 | Argentina | 94 | 94 | FC Barcelona | 110500000.0 | 565000.0 | 2202 | Left | 5.0 | 4.0 | 4.0 | Medium/ Medium | Messi | RF | 10.0 | 2004 | 2021 | 5.583333 | 159.0 | 90.0 | 90.0 | 90.0 | 94.0 | 95.0 | 95.0 | 95.0 | 94.0 | 95.0 | 95.0 | 95.0 | 93.0 | 86.0 | 86.0 | 86.0 | 93.0 | 66.0 | 63.0 | 63.0 | 63.0 | 66.0 | 61.0 | 49.0 | 49.0 | 49.0 | 61.0 | 84.0 | 95.0 | 70.0 | 90.0 | 86.0 | 97.0 | 93.0 | 94.0 | 87.0 | 96.0 | 91.0 | 86.0 | 91.0 | 95.0 | 95.0 | 85.0 | 68.0 | 72.0 | 59.0 | 94.0 | 48.0 | 22.0 | 94.0 | 94.0 | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 |
| 4 | K. De Bruyne | 27 | Belgium | 91 | 92 | Manchester City | 102000000.0 | 355000.0 | 2281 | Right | 4.0 | 5.0 | 4.0 | High/ High | Normal | RCM | 7.0 | 2015 | 2023 | 5.916667 | 154.0 | 85.0 | 85.0 | 85.0 | 90.0 | 90.0 | 90.0 | 90.0 | 90.0 | 91.0 | 91.0 | 91.0 | 91.0 | 90.0 | 90.0 | 90.0 | 91.0 | 80.0 | 80.0 | 80.0 | 80.0 | 80.0 | 76.0 | 69.0 | 69.0 | 69.0 | 76.0 | 93.0 | 82.0 | 55.0 | 92.0 | 82.0 | 86.0 | 85.0 | 83.0 | 91.0 | 91.0 | 78.0 | 76.0 | 79.0 | 91.0 | 77.0 | 91.0 | 63.0 | 90.0 | 75.0 | 91.0 | 76.0 | 61.0 | 87.0 | 94.0 | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 |
| 5 | E. Hazard | 27 | Belgium | 91 | 91 | Chelsea | 93000000.0 | 340000.0 | 2142 | Right | 4.0 | 4.0 | 4.0 | High/ Medium | Normal | LF | 10.0 | 2012 | 2020 | 5.666667 | 163.0 | 86.0 | 86.0 | 86.0 | 92.0 | 91.0 | 91.0 | 91.0 | 92.0 | 92.0 | 92.0 | 92.0 | 92.0 | 85.0 | 85.0 | 85.0 | 92.0 | 69.0 | 66.0 | 66.0 | 66.0 | 69.0 | 63.0 | 52.0 | 52.0 | 52.0 | 63.0 | 81.0 | 84.0 | 61.0 | 89.0 | 80.0 | 95.0 | 83.0 | 79.0 | 83.0 | 94.0 | 94.0 | 88.0 | 95.0 | 90.0 | 94.0 | 82.0 | 56.0 | 83.0 | 66.0 | 80.0 | 54.0 | 41.0 | 87.0 | 89.0 | 86.0 | 91.0 | 34.0 | 27.0 | 22.0 | 11.0 | 12.0 | 6.0 | 8.0 | 8.0 |
| 15 | P. Dybala | 24 | Argentina | 89 | 94 | Juventus | 89000000.0 | 205000.0 | 2092 | Left | 3.0 | 3.0 | 4.0 | High/ Medium | Normal | LF | 21.0 | 2015 | 2022 | 5.833333 | 165.0 | 86.0 | 86.0 | 86.0 | 90.0 | 89.0 | 89.0 | 89.0 | 90.0 | 90.0 | 90.0 | 90.0 | 89.0 | 82.0 | 82.0 | 82.0 | 89.0 | 65.0 | 61.0 | 61.0 | 61.0 | 65.0 | 59.0 | 48.0 | 48.0 | 48.0 | 59.0 | 82.0 | 84.0 | 68.0 | 87.0 | 88.0 | 92.0 | 88.0 | 88.0 | 75.0 | 92.0 | 87.0 | 83.0 | 91.0 | 86.0 | 85.0 | 82.0 | 75.0 | 80.0 | 65.0 | 88.0 | 48.0 | 32.0 | 84.0 | 87.0 | 86.0 | 84.0 | 23.0 | 20.0 | 20.0 | 5.0 | 4.0 | 4.0 | 5.0 | 8.0 |
| 16 | H. Kane | 24 | England | 89 | 91 | Tottenham Hotspur | 83500000.0 | 205000.0 | 2165 | Right | 3.0 | 4.0 | 3.0 | High/ High | Normal | ST | 9.0 | 2010 | 2024 | 6.166667 | 196.0 | 89.0 | 89.0 | 89.0 | 85.0 | 87.0 | 87.0 | 87.0 | 85.0 | 85.0 | 85.0 | 85.0 | 84.0 | 82.0 | 82.0 | 82.0 | 84.0 | 68.0 | 69.0 | 69.0 | 69.0 | 68.0 | 65.0 | 63.0 | 63.0 | 63.0 | 65.0 | 75.0 | 94.0 | 85.0 | 80.0 | 84.0 | 80.0 | 78.0 | 68.0 | 82.0 | 84.0 | 68.0 | 72.0 | 71.0 | 91.0 | 71.0 | 88.0 | 78.0 | 89.0 | 84.0 | 85.0 | 76.0 | 35.0 | 93.0 | 80.0 | 90.0 | 89.0 | 56.0 | 36.0 | 38.0 | 8.0 | 10.0 | 11.0 | 14.0 | 11.0 |
| 25 | K. Mbappé | 19 | France | 88 | 95 | Paris Saint-Germain | 81000000.0 | 100000.0 | 2118 | Right | 3.0 | 4.0 | 5.0 | High/ Medium | Lean | RM | 10.0 | 2018 | 2022 | 5.833333 | 161.0 | 88.0 | 88.0 | 88.0 | 90.0 | 90.0 | 90.0 | 90.0 | 90.0 | 89.0 | 89.0 | 89.0 | 89.0 | 81.0 | 81.0 | 81.0 | 89.0 | 69.0 | 65.0 | 65.0 | 65.0 | 69.0 | 65.0 | 57.0 | 57.0 | 57.0 | 65.0 | 77.0 | 88.0 | 77.0 | 82.0 | 78.0 | 90.0 | 77.0 | 63.0 | 73.0 | 91.0 | 96.0 | 96.0 | 92.0 | 87.0 | 83.0 | 79.0 | 75.0 | 83.0 | 71.0 | 78.0 | 62.0 | 38.0 | 88.0 | 82.0 | 70.0 | 86.0 | 34.0 | 34.0 | 32.0 | 13.0 | 5.0 | 7.0 | 11.0 | 6.0 |
| 7 | L. Suárez | 31 | Uruguay | 91 | 91 | FC Barcelona | 80000000.0 | 455000.0 | 2346 | Right | 5.0 | 4.0 | 3.0 | High/ Medium | Normal | RS | 9.0 | 2014 | 2021 | 6.000000 | 190.0 | 92.0 | 92.0 | 92.0 | 91.0 | 92.0 | 92.0 | 92.0 | 91.0 | 90.0 | 90.0 | 90.0 | 89.0 | 84.0 | 84.0 | 84.0 | 89.0 | 74.0 | 73.0 | 73.0 | 73.0 | 74.0 | 71.0 | 68.0 | 68.0 | 68.0 | 71.0 | 77.0 | 93.0 | 77.0 | 82.0 | 88.0 | 87.0 | 86.0 | 84.0 | 64.0 | 90.0 | 86.0 | 75.0 | 82.0 | 92.0 | 83.0 | 86.0 | 69.0 | 90.0 | 83.0 | 85.0 | 87.0 | 41.0 | 92.0 | 84.0 | 85.0 | 85.0 | 62.0 | 45.0 | 38.0 | 27.0 | 25.0 | 31.0 | 33.0 | 37.0 |
| 17 | A. Griezmann | 27 | France | 89 | 90 | Atlético Madrid | 78000000.0 | 145000.0 | 2246 | Left | 4.0 | 3.0 | 4.0 | High/ High | Lean | CAM | 7.0 | 2014 | 2023 | 5.750000 | 161.0 | 89.0 | 89.0 | 89.0 | 90.0 | 90.0 | 90.0 | 90.0 | 90.0 | 89.0 | 89.0 | 89.0 | 89.0 | 83.0 | 83.0 | 83.0 | 89.0 | 73.0 | 70.0 | 70.0 | 70.0 | 73.0 | 70.0 | 64.0 | 64.0 | 64.0 | 70.0 | 82.0 | 90.0 | 84.0 | 83.0 | 87.0 | 88.0 | 84.0 | 78.0 | 76.0 | 90.0 | 88.0 | 85.0 | 90.0 | 90.0 | 80.0 | 80.0 | 90.0 | 83.0 | 62.0 | 82.0 | 69.0 | 35.0 | 91.0 | 83.0 | 79.0 | 87.0 | 59.0 | 47.0 | 48.0 | 14.0 | 8.0 | 14.0 | 13.0 | 14.0 |
| 1 | Cristiano Ronaldo | 33 | Portugal | 94 | 94 | Juventus | 77000000.0 | 405000.0 | 2228 | Right | 5.0 | 4.0 | 5.0 | High/ Low | C. Ronaldo | ST | 7.0 | 2018 | 2022 | 6.166667 | 183.0 | 94.0 | 94.0 | 94.0 | 92.0 | 93.0 | 93.0 | 93.0 | 92.0 | 91.0 | 91.0 | 91.0 | 91.0 | 84.0 | 84.0 | 84.0 | 91.0 | 68.0 | 64.0 | 64.0 | 64.0 | 68.0 | 64.0 | 56.0 | 56.0 | 56.0 | 64.0 | 84.0 | 94.0 | 89.0 | 81.0 | 87.0 | 88.0 | 81.0 | 76.0 | 77.0 | 94.0 | 89.0 | 91.0 | 87.0 | 96.0 | 70.0 | 95.0 | 95.0 | 88.0 | 79.0 | 93.0 | 63.0 | 29.0 | 95.0 | 82.0 | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
sns.barplot(x=top_value['Value'],y=top_value['Name'])
<AxesSubplot:xlabel='Value', ylabel='Name'>
sns.countplot(data['Age'])
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
<AxesSubplot:xlabel='Age', ylabel='count'>
plt.figure(figsize=(8,8))
plt.scatter(x=data['Overall'],y=data['Potential'])
plt.xlabel("Overall rating")
plt.ylabel("Potential")
plt.title("Scatterplot of Potential v/s Overall Rating")
Text(0.5, 1.0, 'Scatterplot of Potential v/s Overall Rating')
#Top 20 teams with highest player's average Overall rating
club = data.groupby('Club')['Overall'].mean().reset_index().sort_values('Overall', ascending=True).tail(20)
sns.barplot(x=club['Overall'],y=club['Club'])
<AxesSubplot:xlabel='Overall', ylabel='Club'>
sns.countplot(data['Preferred Foot'])
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
<AxesSubplot:xlabel='Preferred Foot', ylabel='count'>
plt.figure(figsize=(8,8))
plt.scatter(x=data['Value'],y=data['Wage'])
plt.xlabel("Value")
plt.ylabel("Wage")
plt.title("Scatterplot of Value v/s Wage")
Text(0.5, 1.0, 'Scatterplot of Value v/s Wage')
plt.figure(figsize=(15,5))
sns.countplot(data['Position'])
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
<AxesSubplot:xlabel='Position', ylabel='count'>
sns.distplot(data['Value'])
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
<AxesSubplot:xlabel='Value', ylabel='Density'>
sns.distplot(data['Wage'])
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
<AxesSubplot:xlabel='Wage', ylabel='Density'>
player = data.groupby('Name')['Height'].mean().reset_index().sort_values('Height', ascending=True).tail(20)
sns.barplot(x=player['Height'],y=player['Name'])
plt.title("Top 20 tallest players")
Text(0.5, 1.0, 'Top 20 tallest players')
player = data.groupby('Name')['Weight'].mean().reset_index().sort_values('Weight', ascending=True).tail(20)
sns.barplot(x=player['Weight'],y=player['Name'])
plt.title("Top 20 heaviest players")
Text(0.5, 1.0, 'Top 20 heaviest players')
'A Akinfenwa' is the most heaviest player.
player = data.groupby('Name')['Crossing'].mean().reset_index().sort_values('Crossing', ascending=True).tail(20)
sns.barplot(x=player['Crossing'],y=player['Name'])
plt.title("Top 20 players who are good in crossing")
Text(0.5, 1.0, 'Top 20 players who are good in crossing')
player = data.groupby('Name')['Finishing'].mean().reset_index().sort_values('Finishing', ascending=True).tail(20)
sns.barplot(x=player['Finishing'],y=player['Name'])
plt.title("Top 20 players who are good in finishing the ball")
Text(0.5, 1.0, 'Top 20 players who are good in finishing the ball')
player = data.groupby('Name')['HeadingAccuracy'].mean().reset_index().sort_values('HeadingAccuracy', ascending=True).tail(20)
sns.barplot(x=player['HeadingAccuracy'],y=player['Name'])
plt.title("Top 20 players who are good at heading the ball")
Text(0.5, 1.0, 'Top 20 players who are good at heading the ball')
player= data.groupby('Name')['ShortPassing'].mean().reset_index().sort_values('ShortPassing', ascending=True).tail(20)
sns.barplot(x=player['ShortPassing'],y=player['Name'])
plt.title("Top 20 players who are good at short passing")
Text(0.5, 1.0, 'Top 20 players who are good at short passing')
player= data.groupby('Name')['Volleys'].mean().reset_index().sort_values('Volleys', ascending=True).tail(20)
sns.barplot(x=player['Volleys'],y=player['Name'])
plt.title("Top 20 players who are good at Volleys")
Text(0.5, 1.0, 'Top 20 players who are good at Volleys')
player= data.groupby('Name')['Dribbling'].mean().reset_index().sort_values('Dribbling', ascending=True).tail(20)
sns.barplot(x=player['Dribbling'],y=player['Name'])
plt.title("Top 20 players who are good at Dribbling")
Text(0.5, 1.0, 'Top 20 players who are good at Dribbling')
player= data.groupby('Name')['Curve'].mean().reset_index().sort_values('Curve', ascending=True).tail(20)
sns.barplot(x=player['Curve'],y=player['Name'])
plt.title("Top 20 players who are good at curving the ball")
Text(0.5, 1.0, 'Top 20 players who are good at curving the ball')
player= data.groupby('Name')['LongPassing'].mean().reset_index().sort_values('LongPassing', ascending=True).tail(20)
sns.barplot(x=player['LongPassing'],y=player['Name'])
plt.title("Top 20 players who are good at long pass")
Text(0.5, 1.0, 'Top 20 players who are good at long pass')
player= data.groupby('Name')['BallControl'].mean().reset_index().sort_values('BallControl', ascending=True).tail(20)
sns.barplot(x=player['BallControl'],y=player['Name'])
plt.title("Top 20 players who are good at ball control")
Text(0.5, 1.0, 'Top 20 players who are good at ball control')
player= data.groupby('Name')['Acceleration'].mean().reset_index().sort_values('Acceleration', ascending=True).tail(20)
sns.barplot(x=player['Acceleration'],y=player['Name'])
plt.title("Top 20 players who are good at accelerating the ball")
Text(0.5, 1.0, 'Top 20 players who are good at accelerating the ball')
player= data.groupby('Name')['SprintSpeed'].mean().reset_index().sort_values('SprintSpeed', ascending=True).tail(20)
sns.barplot(x=player['SprintSpeed'],y=player['Name'])
plt.title("Top 20 players who are good at sprinting")
Text(0.5, 1.0, 'Top 20 players who are good at sprinting')
player= data.groupby('Name')['Agility'].mean().reset_index().sort_values('Agility', ascending=True).tail(20)
sns.barplot(x=player['Agility'],y=player['Name'])
plt.title("Top 20 players agile players")
Text(0.5, 1.0, 'Top 20 players agile players')
player= data.groupby('Name')['Reactions'].mean().reset_index().sort_values('Reactions', ascending=True).tail(20)
sns.barplot(x=player['Reactions'],y=player['Name'])
plt.title("Top 20 players who is best in reaction")
Text(0.5, 1.0, 'Top 20 players who is best in reaction')
Cristiano Ronaldo has best reaction among all.
player= data.groupby('Name')['Reactions'].mean().reset_index().sort_values('Reactions', ascending=True).tail(20)
sns.barplot(x=player['Reactions'],y=player['Name'])
plt.title("Top 20 players who is best in reaction")
Text(0.5, 1.0, 'Top 20 players who is best in reaction')
player= data.groupby('Name')['Balance'].mean().reset_index().sort_values('Balance', ascending=True).tail(20)
sns.barplot(x=player['Balance'],y=player['Name'])
plt.title("Top 20 players with best balance")
Text(0.5, 1.0, 'Top 20 players with best balance')
player= data.groupby('Name')['ShotPower'].mean().reset_index().sort_values('ShotPower', ascending=True).tail(20)
sns.barplot(x=player['ShotPower'],y=player['Name'])
plt.title("Top 20 players with best shot power")
Text(0.5, 1.0, 'Top 20 players with best shot power')
player= data.groupby('Name')['Jumping'].mean().reset_index().sort_values('Jumping', ascending=True).tail(20)
sns.barplot(x=player['Jumping'],y=player['Name'])
plt.title("Top 20 players who are best jumpers")
Text(0.5, 1.0, 'Top 20 players who are best jumpers')
player= data.groupby('Name')['Stamina'].mean().reset_index().sort_values('Stamina', ascending=True).tail(20)
sns.barplot(x=player['Stamina'],y=player['Name'])
plt.title("Top 20 players having high stamina")
Text(0.5, 1.0, 'Top 20 players having high stamina')
player= data.groupby('Name')['Strength'].mean().reset_index().sort_values('Strength', ascending=True).tail(20)
sns.barplot(x=player['Strength'],y=player['Name'])
plt.title("Top 20 players with high strength")
Text(0.5, 1.0, 'Top 20 players with high strength')
player= data.groupby('Name')['LongShots'].mean().reset_index().sort_values('LongShots', ascending=True).tail(20)
sns.barplot(x=player['LongShots'],y=player['Name'])
plt.title("Top 20 players who are good at long shot")
Text(0.5, 1.0, 'Top 20 players who are good at long shot')
player= data.groupby('Name')['Aggression'].mean().reset_index().sort_values('Aggression', ascending=True).tail(20)
sns.barplot(x=player['Aggression'],y=player['Name'])
plt.title("Top 20 most aggressive players")
Text(0.5, 1.0, 'Top 20 most aggressive players')
player= data.groupby('Name')['Penalties'].mean().reset_index().sort_values('Penalties', ascending=True).tail(20)
sns.barplot(x=player['Penalties'],y=player['Name'])
plt.title("Top 20 players with penalties")
Text(0.5, 1.0, 'Top 20 players with penalties')
player= data.groupby('Name')['SlidingTackle'].mean().reset_index().sort_values('SlidingTackle', ascending=True).tail(20)
sns.barplot(x=player['SlidingTackle'],y=player['Name'])
plt.title("Top 20 players with best sliding tackle")
Text(0.5, 1.0, 'Top 20 players with best sliding tackle')
player= data.groupby('Name')['GKPositioning'].mean().reset_index().sort_values('GKPositioning', ascending=True).tail(20)
sns.barplot(x=player['GKPositioning'],y=player['Name'])
plt.title("Top 20 players with best GKPositioning")
Text(0.5, 1.0, 'Top 20 players with best GKPositioning')
plt.figure(figsize=(8,8))
plt.scatter(x=data['Age'],y=data['Overall'])
plt.xlabel("Age")
plt.ylabel("Overall")
plt.title("Scatterplot of Age v/s Overall")
Text(0.5, 1.0, 'Scatterplot of Age v/s Overall')
plt.figure(figsize=(8,8))
plt.scatter(x=data['Age'],y=data['Value'])
plt.xlabel("Age")
plt.ylabel("Value")
plt.title("Scatterplot of Age v/s value")
Text(0.5, 1.0, 'Scatterplot of Age v/s value')
plt.figure(figsize=(8,8))
plt.scatter(x=data['Age'],y=data['Crossing'])
plt.xlabel("Age")
plt.ylabel("Crossing")
plt.title("Scatterplot of Age v/s Crossing ")
Text(0.5, 1.0, 'Scatterplot of Age v/s Crossing ')
plt.figure(figsize=(8,8))
plt.scatter(x=data['Age'],y=data['Acceleration'])
plt.xlabel("Age")
plt.ylabel("Acceleration")
plt.title("Scatterplot of Age v/s Acceleration")
Text(0.5, 1.0, 'Scatterplot of Age v/s Acceleration')
plt.figure(figsize=(8,8))
plt.scatter(x=data['Age'],y=data['ShortPassing'])
plt.xlabel("Age")
plt.ylabel("ShortPassing")
plt.title("Scatterplot of Age v/s ShortPassing")
Text(0.5, 1.0, 'Scatterplot of Age v/s ShortPassing')
plt.figure(figsize=(8,8))
plt.scatter(x=data['Age'],y=data['Reactions'])
plt.xlabel("Age")
plt.ylabel("Reactions")
plt.title("Scatterplot of Age v/s Reactions")
Text(0.5, 1.0, 'Scatterplot of Age v/s Reactions')
plt.figure(figsize=(8,8))
plt.scatter(x=data['Dribbling'],y=data['BallControl'])
plt.xlabel("Dribbling")
plt.ylabel("BallControl")
plt.title("Scatterplot of Dribbling v/s BallControl")
Text(0.5, 1.0, 'Scatterplot of Dribbling v/s BallControl')
sns.lmplot(x = 'BallControl', y = 'Dribbling', data = data,col = 'Preferred Foot')
<seaborn.axisgrid.FacetGrid at 0x27bf82e4ac0>
fig = px.pie(data, names = "Work Rate",
title = "<b>Counts in Work Rate</b>",
hole = 0.4, template = "plotly_dark",
width=600,
height=500)
fig.show()
sns.boxplot(data['Age'])
plt.title("Boxplot of Age")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, 'Boxplot of Age')
sns.boxplot(data['Overall'])
plt.title("Boxplot of Overall")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, 'Boxplot of Overall')
sns.boxplot(data['Potential'])
plt.title("Boxplot of Potential")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, 'Boxplot of Potential')
sns.boxplot(data['Value'])
plt.title("Boxplot of Value")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, 'Boxplot of Value')
sns.boxplot(data['Wage'])
plt.title('Boxplot of Wage')
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, 'Boxplot of Wage')
sns.boxplot(data['Special'])
plt.title('Boxplot of Special')
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, 'Boxplot of Special')
sns.boxplot(data['International Reputation'])
plt.title('Boxplot of "International Reputation"')
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, 'Boxplot of "International Reputation"')
sns.boxplot(data['Weak Foot'])
plt.title("Boxplot of 'Weak Foot'")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, "Boxplot of 'Weak Foot'")
sns.boxplot(data['Skill Moves'])
plt.title("Bixplot of 'Skill Moves'")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, "Bixplot of 'Skill Moves'")
There is only one outlier in 'Skill Moves'.
sns.boxplot(data['Height'])
plt.title("Boxplot of 'Height'")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, "Boxplot of 'Height'")
sns.boxplot(data['Weight'])
plt.title("Boxplot of 'Weight'")
C:\Users\Rahul Gupta\anaconda3\envs\PyR\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
Text(0.5, 1.0, "Boxplot of 'Weight'")
numerical_col=data.select_dtypes(include=['int64','float64']).columns
#Lower and Upper fence of Outliers Identification
def lower_limit(x):
return np.quantile(x,0.25)-1.5*(np.quantile(x,0.75)-np.quantile(x,0.25))
def upper_limit(x):
return np.quantile(x,0.25)+1.5*(np.quantile(x,0.75)-np.quantile(x,0.25))
# Getting rid of non-integer features.
data2 = data.drop(columns=[ 'Name', 'Nationality', 'Club','Value','Wage','Preferred Foot','Work Rate','Body Type','Position', 'Joined','Contract Valid Until','Height','Weight','LS','ST','RS','LW','LF','CF','RF','RW','LAM','CAM','RAM','LM','LCM','CM','RCM','RM','LWB','LDM','CDM','RDM','RWB','LB','LCB','CB','RCB','RB'],axis=1)
X = data2.values
X = StandardScaler().fit_transform(X)
print(X)
[[ 1.27604112 4.00853692 3.71546904 ... 1.41220981 1.12857961 -0.80764857] [ 1.7102333 4.00853692 3.71546904 ... 1.41220981 1.12857961 0.1591048 ] [ 0.19056065 3.71825816 3.55156207 ... 1.41220981 1.45376768 0.1591048 ] ... [-1.98040027 -2.81301389 -0.71001907 ... -0.17812724 -1.47292492 0.80360705] [-1.76330418 -2.81301389 -0.87392604 ... 1.0941424 -0.82254879 -0.48539744] [-1.98040027 -2.95815327 -0.87392604 ... -0.49619465 0.47820348 -0.48539744]]
wcss=[]
score=[]
for i in (3,5,7):
kmeans=KMeans(n_clusters=i,init='k-means++',random_state=0)
kmeans.fit(X)
cluster_labels = kmeans.fit_predict(X)
score.append(metrics.silhouette_score(X, cluster_labels))
wcss.append(kmeans.inertia_)
sns.set()
plt.plot([3,5,7],wcss)
plt.title("The elbow plot graph")
plt.xlabel("Number of Clusters")
plt.ylabel("WCSS")
plt.show()
sns.barplot(x=[3,5,7],y=score)
plt.title("Barplot Representing Silhouette Score of Different k Values")
plt.xlabel("k")
plt.ylabel("Silhouette Score")
Text(0, 0.5, 'Silhouette Score')
kmeans1=KMeans(n_clusters=7,init='k-means++',random_state=0)
Y=kmeans1.fit_predict(X)
print(Y)
[6 6 6 ... 0 0 3]
kmeans2=KMeans(n_clusters=3,init='k-means++',random_state=0)
Y=kmeans2.fit_predict(X)
print(Y)
[0 0 0 ... 1 1 2]
#Reducing the dimensionality of the Data
pca = PCA(n_components = 2)
X_principal = pca.fit_transform(X)
X_principal = pd.DataFrame(X_principal)
X_principal.columns = ['P1', 'P2']
import scipy.cluster.hierarchy as shc
plt.figure(figsize =(8, 8))
plt.title('Visualising the data')
Dendrogram = shc.dendrogram((shc.linkage(X_principal, method ='ward')))
#k=2
ac2 = AgglomerativeClustering(n_clusters = 2)
plt.figure(figsize =(6, 6))
plt.scatter(X_principal['P1'], X_principal['P2'],
c = ac2.fit_predict(X_principal), cmap ='rainbow')
plt.show()
#k=3
ac3 = AgglomerativeClustering(n_clusters = 3)
plt.figure(figsize =(6, 6))
plt.scatter(X_principal['P1'], X_principal['P2'],c = ac3.fit_predict(X_principal), cmap ='rainbow')
plt.show()
#k=4
ac4 = AgglomerativeClustering(n_clusters = 4)
plt.figure(figsize =(6, 6))
plt.scatter(X_principal['P1'], X_principal['P2'],
c = ac4.fit_predict(X_principal), cmap ='rainbow')
plt.show()
#k=5
ac5 = AgglomerativeClustering(n_clusters = 5)
plt.figure(figsize =(6, 6))
plt.scatter(X_principal['P1'], X_principal['P2'],
c = ac5.fit_predict(X_principal), cmap ='rainbow')
plt.show()
#k=6
ac6 = AgglomerativeClustering(n_clusters = 6)
plt.figure(figsize =(6, 6))
plt.scatter(X_principal['P1'], X_principal['P2'],
c = ac6.fit_predict(X_principal), cmap ='rainbow')
plt.show()
k = [2, 3, 4, 5, 6]
from sklearn.metrics import silhouette_score
# Appending the silhouette scores of the different models to the list
silhouette_scores = []
silhouette_scores.append(
silhouette_score(X_principal, ac2.fit_predict(X_principal)))
silhouette_scores.append(
silhouette_score(X_principal, ac3.fit_predict(X_principal)))
silhouette_scores.append(
silhouette_score(X_principal, ac4.fit_predict(X_principal)))
silhouette_scores.append(
silhouette_score(X_principal, ac5.fit_predict(X_principal)))
silhouette_scores.append(
silhouette_score(X_principal, ac6.fit_predict(X_principal)))
# Plotting a bar graph to compare the results
plt.bar(k, silhouette_scores)
plt.xlabel('Number of clusters', fontsize = 20)
plt.ylabel('S(i)', fontsize = 20)
plt.show()
from sklearn.cluster import DBSCAN
# Compute DBSCAN
db = DBSCAN(eps=0.3, min_samples=10).fit(X_principal)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels))
Estimated number of clusters: 8 Silhouette Coefficient: -0.099